#select datetime, userid, variablecode,"value", locationid, datasourcecode from observations;
#select datetime, forecastdatetime, regionid, variablecode,"value", datasourcecode from forecasts;
#select "id", phonenumber, regionid,locationid, roles,firstname, lastname from users;

#outputs of this script:
# - split SF and observations from all communities
# - best farmer forecasts from every community

import pandas as pd
import datetime
import numpy as np
import matplotlib.pyplot as plt

from sklearn.metrics import mean_squared_error
from math import sqrt

#load data from pg Admin
regions = pd.read_csv("C:/Users/joepb/PycharmProjects/data_storage/data_DROP_app/regions (meteoblue).csv")
forecasts = pd.read_csv("C:/Users/joepb/PycharmProjects/data_storage/data_DROP_app/data-1662467091031 - meteoblue_forecasts.csv")
user_input = pd.read_csv ("C:/Users/joepb/PycharmProjects/data_storage/data_DROP_app/data-1662467666108 - observations.csv")
users = pd.read_csv("C:/Users/joepb/PycharmProjects/data_storage/data_DROP_app/Users.csv")


#categorise precipitation in forecasts
n = 0
for n in range(len(forecasts)):
    if forecasts.loc[n,'value'] >= 20:
        forecasts.loc[n, 'cat'] = int(4)
    if forecasts.loc[n,'value'] <= 20:
        forecasts.loc[n, 'cat'] = int(3)
    if forecasts.loc[n,'value'] <= 10:
        forecasts.loc[n, 'cat'] = int(2)
    if forecasts.loc[n,'value'] <= 5:
        forecasts.loc[n, 'cat'] = int(1)
    if forecasts.loc[n,'value'] == 0:
        forecasts.loc[n, 'cat'] = int(0)

#    n += 2
    if n == len(forecasts):
        break

# split data from pg admin into sf forecasts, observations and if forecasts
forecasts['datetime'] = pd.to_datetime(forecasts['datetime'])
forecasts['forecastdatetime'] = pd.to_datetime(forecasts['forecastdatetime'])
user_input['datetime'] = pd.to_datetime(user_input['datetime'])
observations = user_input.loc[user_input['datasourcecode']=='user_local_observation']
if_forecast = user_input.loc[user_input['datasourcecode']== 'user_local_forecast']


#If you want to analyse all data for ghana together (not used in my thesis)
regions_ghana = [2,3,4,6,7,8]
users_ghana = users.loc[users['regionid'].isin(regions_ghana)]
if_forecasts_total_ghana = if_forecast.loc[if_forecast['locationid'].isin(users_ghana['locationid'])]
observations_ghana = observations.loc[observations['locationid'].isin(users_ghana['locationid'])]
if_forecasts_total_ghana.to_csv('C:/Users/joepb/Downloads/farmer_forecasts_ghana.csv')
if_forecasts_total_ghana = if_forecasts_total_ghana.groupby('datetime')
if_forecasts_total_ghana = if_forecasts_total_ghana.aggregate(np.mean)

#The information below is regarding which farmer was in charge of the rain gauge in 2022, and from what date onwards the rain gauge was installed

#rain-gauge observations Nakpanzoo: +233 243741097 (mohammed) (from 28th of july) dr. Bizoola placed them.
#USERID / LOCATION ID NAKPANZOO: 54/103, region 2
#rain-gauge observation Yapalsi: +233 592460661  Sualesu (22th of august (approx))
#USERID / LOCATION ID YAPALSI: 112/485, region 3

#seperating regions is done for all regions below

########
#Gbulung-----------------------------------------------------------------------------------------------------
########

#rain gauge gbulung
#midstream: User id: 1581
#upstream : user id: 78 of 74
#downstream: userid: 71

users_gbulung = users.loc[users['regionid'] == 4]
forecasts_gbulung = forecasts.loc[forecasts['regionid'] == 4]

if_forecast_gbulung = if_forecast.loc[if_forecast['locationid'].isin(users_gbulung['locationid'])]

#select observations and add a column with the date the observation is obaut (1 day earlier)
observations_gbulung = observations.loc[observations['locationid'].isin(users_gbulung['locationid'])]

#average the observations of all users on the same date into a single number
avg_observations_gbulung = observations_gbulung.groupby('datetime')
avg_observations_gbulung = avg_observations_gbulung.aggregate(np.mean)

observers_gbulung = [1581,78,74,71]
rain_gauge_date_gbulung = pd.to_datetime('2022-09-21 00:00:00')

#Take the observations only after the 21 of september
Real_observations_gbulung = observations_gbulung.loc[observations_gbulung['locationid'].isin(observers_gbulung)]
Real_observations_gbulung = Real_observations_gbulung.loc[Real_observations_gbulung['datetime'] > rain_gauge_date_gbulung]
Real_observations_gbulung_avg = Real_observations_gbulung.groupby('datetime')
Real_observations_gbulung_avg = Real_observations_gbulung_avg.aggregate(np.mean)
Real_observations_gbulung_avg.reset_index(inplace=True)
avg_observations_gbulung.reset_index(inplace=True)

#replace the avg values in the dataset with the raingauge data
Replacementvalues = Real_observations_gbulung_avg.loc[Real_observations_gbulung_avg['datetime'].isin(avg_observations_gbulung['datetime'])]
Replacementvalues = Replacementvalues.set_index(avg_observations_gbulung.loc[avg_observations_gbulung.datetime.isin(Replacementvalues.datetime),:].index)
avg_observations_gbulung.update(Replacementvalues)

#filter only same and next day forecasts -> used sameday forecasts in thesis
forecasts_gbulung_sameday = forecasts_gbulung.loc[forecasts_gbulung['datetime'] == forecasts_gbulung['forecastdatetime']]
forecasts_gbulung_sameday_prec = forecasts_gbulung_sameday.loc[forecasts_gbulung_sameday['variablecode'] == 'prec']
# forecasts_gbulung_nextday = forecasts_gbulung.loc[forecasts_gbulung['datetime']  == forecasts_gbulung['forecastdatetime'] + datetime.timedelta(days = 1) ]
# forecasts_gbulung_nextday_prec = forecasts_gbulung_nextday.loc[forecasts_gbulung_nextday['variablecode'] == 'prec']

observers_gbulung = [1581,78,74,71]

#farmers taken from the farmer forecast script
Good_farmers_gbulung = [27,69,80,96]

IF_forecasts_goodfarmers_gbulung = if_forecast_gbulung.loc[if_forecast_gbulung['locationid'].isin(Good_farmers_gbulung)]
df_goodfarmers = IF_forecasts_goodfarmers_gbulung.merge(forecasts_gbulung_sameday_prec, left_on = 'datetime', right_on = 'datetime')
df_goodfarmers_2 = df_goodfarmers.merge(avg_observations_gbulung, left_on = 'datetime', right_on=avg_observations_gbulung['datetime'])


df_gbulung = pd.concat([forecasts_gbulung_sameday_prec, if_forecast_gbulung])

########
#Nakpanzoo-----------------------------------------------------------------------------------------------------
########

forecasts_nakpanzoo = forecasts.loc[forecasts['regionid'] == 2]
users_nakpanzoo = users.loc[users['regionid'] == 2]

#select all if forecasts and average them per date
if_forecast_nakpanzoo = if_forecast.loc[if_forecast['locationid'].isin(users_nakpanzoo['locationid'])]

#select observations and add a column with the date the observation is obaut (1 day earlier)
observations_nakpanzoo = observations.loc[observations['locationid'].isin(users_nakpanzoo['locationid'])]

#average the observations of all users on the same date into a single number
avg_observations_nakpanzoo = observations_nakpanzoo.groupby('datetime')
avg_observations_nakpanzoo= avg_observations_nakpanzoo.aggregate(np.mean)


rain_gauge_date_nakpanzoo = pd.to_datetime('28-07-2022 00:00:00')
#Take the observations only after the 21 of september from farmer 54
Real_observations_nakpanzoo = observations_nakpanzoo.loc[observations_nakpanzoo['userid']==54]
Real_observations_nakpanzoo = Real_observations_nakpanzoo.loc[Real_observations_nakpanzoo['datetime'] > rain_gauge_date_nakpanzoo]
Real_observations_nakpanzoo_avg = Real_observations_nakpanzoo.groupby('datetime')
Real_observations_nakpanzoo_avg = Real_observations_nakpanzoo_avg.aggregate(np.mean)
Real_observations_nakpanzoo_avg.reset_index(inplace=True)
avg_observations_nakpanzoo.reset_index(inplace=True)
#replace the avg values in the dataset with the raingauge data
Replacementvalues = Real_observations_nakpanzoo_avg.loc[Real_observations_nakpanzoo_avg['datetime'].isin(avg_observations_nakpanzoo['datetime'])]
Replacementvalues = Replacementvalues.set_index(avg_observations_nakpanzoo.loc[avg_observations_nakpanzoo.datetime.isin(Replacementvalues.datetime),:].index)
avg_observations_nakpanzoo.update(Replacementvalues)

avg_observations_nakpanzoo.to_csv('C:/Users/joepb/PycharmProjects/data_storage/avg_obs_nakpanzoo.csv')

#filter only same and next day forecasts
forecasts_nakpanzoo_sameday = forecasts_nakpanzoo.loc[forecasts_nakpanzoo['datetime'] == forecasts_nakpanzoo['forecastdatetime']]
forecasts_nakpanzoo_sameday_prec = forecasts_nakpanzoo_sameday.loc[forecasts_nakpanzoo_sameday['variablecode'] == 'prec']
forecasts_nakpanzoo_nextday = forecasts_nakpanzoo.loc[forecasts_nakpanzoo['datetime']  == forecasts_nakpanzoo['forecastdatetime'] + datetime.timedelta(days = 1) ]
forecasts_nakpanzoo_nextday_prec = forecasts_nakpanzoo_nextday.loc[forecasts_nakpanzoo_nextday['variablecode'] == 'prec']


#forecasts_nakpanzoo_sameday_prec = forecasts_nakpanzoo_sameday_prec.loc[forecasts_nakpanzoo_sameday_prec['datetime'] > pd.to_datetime('2022-07-31 23:00:00')]
# forecasts_nakpanzoo_sameday_prec = forecasts_nakpanzoo_sameday_prec.loc[forecasts_nakpanzoo_sameday_prec['datetime'] < pd.to_datetime('2022-10-01 00:00:00')]
forecasts_nakpanzoo_short = forecasts_nakpanzoo_sameday_prec.drop(['forecastdatetime', 'regionid', 'variablecode', 'datasourcecode'], axis= 1)
forecasts_nakpanzoo_short.to_csv('C:/Users/joepb/PycharmProjects/data_storage/forecast_MB_Nakpanzoo_short.csv')

#was used in some analysis
# forecasts_nakpanzoo_short_prob = forecasts_nakpanzoo_sameday_prob.drop(['forecastdatetime', 'regionid', 'variablecode', 'datasourcecode'], axis= 1)
# forecasts_nakpanzoo_short_prob.to_csv('C:/Users/joepb/PycharmProjects/data_storage/forecast_MB_Nakpanzoo_prob_short.csv')


#take only the observations from the source with the rain gauge
# df_obs_SF = forecasts_nakpanzoo_sameday_prec.merge(Real_observations_Nakpanzoo, left_on='datetime', right_on='observeddate')
# df_all_obs_SF = forecasts_nakpanzoo_sameday_prec.merge(avg_observations_nakpanzoo, left_on='datetime', right_on=avg_observations_nakpanzoo.index)
# df_obs_IF = if_forecast_nakpanzoo.merge(Real_observations_Nakpanzoo, left_on='datetime', right_on='observeddate')
# df_all_obs_IF = if_forecast_nakpanzoo.merge(avg_observations_nakpanzoo, left_on='datetime', right_on=avg_observations_nakpanzoo.index)


########
#yapalsi-----------------------------------------------------------------------------------------------------
########

forecasts_yapalsi = forecasts.loc[forecasts['regionid'] == 3]
users_yapalsi = users.loc[users['regionid'] == 3]
observations_yapalsi = observations.loc[observations['locationid'].isin(users_yapalsi['locationid'])]

if_forecast_yapalsi = if_forecast.loc[if_forecast['locationid'].isin(users_yapalsi['locationid'])]

#average the observations of all users on the same date into a single number
avg_observations_yapalsi = observations_yapalsi.groupby('datetime')
avg_observations_yapalsi = avg_observations_yapalsi.aggregate(np.mean)

avg_observations_yapalsi.to_csv('C:/Users/joepb/PycharmProjects/data_storage/avg_obs_yapalsi.csv')

#filter only same and next day forecasts
forecasts_yapalsi_sameday = forecasts_yapalsi.loc[forecasts_yapalsi['datetime'] == forecasts_yapalsi['forecastdatetime']]
forecasts_yapalsi_sameday_prec = forecasts_yapalsi_sameday.loc[forecasts_yapalsi_sameday['variablecode'] == 'prec']
# forecasts_yapalsi_nextday = forecasts_yapalsi.loc[forecasts_yapalsi['datetime']  == forecasts_yapalsi['forecastdatetime'] + datetime.timedelta(days = 1) ]
# forecasts_yapalsi_nextday_prec = forecasts_yapalsi_nextday.loc[forecasts_yapalsi_nextday['variablecode'] == 'prec']

#Almost no observations done by the responsible farmer...
Real_observations_yapalsi = observations_yapalsi.loc[observations_yapalsi['locationid']== 485]


########
#Nabogu-----------------------------------------------------------------------------------------------------
########
forecasts_nabogu = forecasts.loc[forecasts['regionid'] == 8]
users_nabogu = users.loc[users['regionid'] == 8]
observations_nabogu = observations.loc[observations['locationid'].isin(users_nabogu['locationid'])]

if_forecast_nabogu = if_forecast.loc[if_forecast['locationid'].isin(users_nabogu['locationid'])]

#average the observations of all users on the same date into a single number
avg_observations_nabogu = observations_nabogu.groupby('datetime')
avg_observations_nabogu = avg_observations_nabogu.aggregate(np.mean)

#filter only same and next day forecasts
forecasts_nabogu_sameday = forecasts_nabogu.loc[forecasts_nabogu['datetime'] == forecasts_nabogu['forecastdatetime']]
forecasts_nabogu_sameday_prec = forecasts_nabogu_sameday.loc[forecasts_nabogu_sameday['variablecode'] == 'prec']
forecasts_nabogu_nextday = forecasts_nabogu.loc[forecasts_nabogu['datetime']  == forecasts_nabogu['forecastdatetime'] + datetime.timedelta(days = 1) ]
forecasts_nabogu_nextday_prec = forecasts_nabogu_nextday.loc[forecasts_nabogu_nextday['variablecode'] == 'prec']

# df_all_obs_SF = forecasts_nabogu_sameday_prec.merge(avg_observations_nabogu, left_on='datetime', right_on=avg_observations_nabogu.index)
# df_all_obs_IF = if_forecast_nabogu.merge(avg_observations_nabogu, left_on='datetime', right_on=avg_observations_nabogu.index)

########
#Gushie-----------------------------------------------------------------------------------------------------
########

forecasts_gushie = forecasts.loc[forecasts['regionid'] == 6]
users_gushie = users.loc[users['regionid'] == 6]
observations_gushie = observations.loc[observations['locationid'].isin(users_gushie['locationid'])]

if_forecast_gushie = if_forecast.loc[if_forecast['locationid'].isin(users_gushie['locationid'])]

#average the observations of all users on the same date into a single number
avg_observations_gushie = observations_gushie.groupby('datetime')
avg_observations_gushie = avg_observations_gushie.aggregate(np.mean)

#filter only same and next day forecasts
forecasts_gushie_sameday = forecasts_gushie.loc[forecasts_gushie['datetime'] == forecasts_gushie['forecastdatetime']]
forecasts_gushie_sameday_prec = forecasts_gushie_sameday.loc[forecasts_gushie_sameday['variablecode'] == 'prec']
forecasts_gushie_nextday = forecasts_gushie.loc[forecasts_gushie['datetime']  == forecasts_gushie['forecastdatetime'] + datetime.timedelta(days = 1) ]
forecasts_gushie_nextday_prec = forecasts_gushie_nextday.loc[forecasts_gushie_nextday['variablecode'] == 'prec']

Real_observations_gushie = observations_gushie.loc[observations_gushie['locationid']== 485]

# df_all_obs_SF = forecasts_gushie_sameday_prec.merge(avg_observations_gushie, left_on='datetime', right_on=avg_observations_gushie.index)
# df_all_obs_IF = if_forecast_gushie.merge(avg_observations_gushie, left_on='datetime', right_on=avg_observations_gushie.index)

########
#Diare-----------------------------------------------------------------------------------------------------
########

forecasts_diare = forecasts.loc[forecasts['regionid'] == 7]
users_diare = users.loc[users['regionid'] == 7]
observations_diare = observations.loc[observations['locationid'].isin(users_diare['locationid'])]

if_forecast_diare = if_forecast.loc[if_forecast['locationid'].isin(users_diare['locationid'])]

#average the observations of all users on the same date into a single number
avg_observations_diare = observations_diare.groupby('datetime')
avg_observations_diare = avg_observations_diare.aggregate(np.mean)

#filter only same and next day forecasts
forecasts_diare_sameday = forecasts_diare.loc[forecasts_diare['datetime'] == forecasts_diare['forecastdatetime']]
forecasts_diare_sameday_prec = forecasts_diare_sameday.loc[forecasts_diare_sameday['variablecode'] == 'prec']
forecasts_diare_nextday = forecasts_diare.loc[forecasts_diare['datetime']  == forecasts_diare['forecastdatetime'] + datetime.timedelta(days = 1) ]
forecasts_diare_nextday_prec = forecasts_diare_nextday.loc[forecasts_diare_nextday['variablecode'] == 'prec']

Real_observations_diare = observations_diare.loc[observations_diare['locationid']== 485]

# df_all_obs_SF = forecasts_diare_sameday_prec.merge(avg_observations_diare, left_on='datetime', right_on=avg_observations_diare.index)
# df_all_obs_IF = if_forecast_diare.merge(avg_observations_diare, left_on='datetime', right_on=avg_observations_diare.index)


